/* the dofile processes the institution-level bank failure data from Paul Hellman 

	From Call report: start - name
	From HMDA: originations - end
*/

u "$data_raw_failure/riskDataTarp 2-12-20.dta",clear

drop if idVar=="16407.cu" & year=="2012"  // abnormally high second mortgage net charge-off ratio 
	
	
	
* ------------------------------------------ *
* 		      Preliminary checks			 *
* ------------------------------------------ *

order idVar, b(state)
ren idVar id 
la var id "Institution ID"
egen id_num = group(id)


** duplications
unique id year
duplicates list id year
drop if name == "EAGLE CREDIT UNION" // only one has a different name

** strings
destring year, replace
egen state_num = group(state)



* ------------------------------------------ *
* 		     	 Variables					 *
* ------------------------------------------ *

********************************************
*      		  rescale
ren total_assets assets
ren amt_total_loans loans
foreach x in assets loans coreDeposits {
	replace `x' = `x'/1e+06
	gen log_`x' = log(`x')
	sum `x'
}  
drop logAsset 

foreach x in avg_income avg_loan_amt{
	replace `x' = `x'/1e+06
}

replace tarpDisbursed = 0 if tarpDisbursed==.
replace tarpDisbursed = tarpDisbursed / 1e+06
replace tarpProfit = 0 if tarpProfit==.
replace tarpProfit = tarpProfit / 1e+06


** replace fist subprime and second subprime
gen subordinate_lien = originations - first_lien
replace pct_sub_prime_first_originatins = sub_prime_first_originations/first_lien
replace pct_sub_prime_second_originatns = sub_prime_second_originations/subordinate_lien

ren pct_num12_total_subprime pct_subprime_conv
ren num12_total_subprime subprime_conv
ren num12_total origination_conv
ren pct_applications pct_originations
ren pct_sub_prime_first_originatins pct_subprime_first
ren pct_sub_prime_second_originatns pct_subprime_second
ren pct_home_equity_loans_subprime  pct_subprime_homequity
ren pct_refi_loans_subprime pct_subprime_refi

foreach x in pct_originations pct_subprime pct_subprime_conv pct_subprime_first pct_subprime_second pct_subprime_homequity pct_subprime_refi delqRatio netChargeOffRatio marketShare {
	replace `x'=`x'*100
}
/* pctComm pct_sold_to_secondary keep as decimals. It is
   because their coefficients are otherwise too small in the regression */


  
   
********************************************
*      		  growth rate
xtset id_num year

/* We already have:
	loanGrowth, asetGrowth, it is exactly the same with D.loans/L.loans
*/ 

foreach x in application originations subprime subprime_conv{
	gen `x'Growth = D.`x'/L.`x'
}



********************************************
*      		  label

** Outcomes **
ta failure closeType,m  
	// not all clousures are failure, only forced merger counts as failure
	
gen fail = failure == "Yes"
gen fail_liquidation = failure == "Yes" & closeType == "Liquidation"
gen fail_merger = failure == "Yes" & closeType == "Merger"
la var fail "failure"


* subprime lending  (from HMDA)
la var pct_subprime "subprime origination, \%"  // = subprime/originations
la var pct_originations "origination, \%" // = originations/applications
la var pct_subprime_conv "conv subprime origination, \%" // subprime_conv/ origiantion_conv

la var pct_subprime_first  "subprime first origination, \%" // need first_originations
la var pct_subprime_second "subprime second origination, \%"  // need second_originations

la var pct_subprime_homequity "subprime home equity origination, \% "
la var pct_subprime_refi "subprime refinance origination, \%"

// no purchase_loans_subprime



* quality of loans in total (Call report)
la var delqRatio "loan delinquency, \%"
la var netChargeOffRatio "loan chargeoff, \%"


* quality of mortgages only (Call report)
gen delqRatio_mort = delqMortgages/mortgages * 100
gen netChargeOffRatio_mort = netChargeMortgages/mortgages * 100 
la var delqRatio_mort "mortgage deliquency, \%"
la var netChargeOffRatio_mort "mortgage chargeoff, \%"

gen delqRatio_fstmort = delqFirstMortgages/firstMortgages * 100
gen netChargeOffRatio_fstmort = netChargeFirstMortgages/firstMortgages * 100 
la var delqRatio_fstmort "first mortgage deliquency, \%"
la var netChargeOffRatio_fstmort "first mortgage chargeoff, \%"

gen delqRatio_secmort = delqSecondMortgages/secondMortgages * 100
gen netChargeOffRatio_secmort = netChargeSecondMortgages/secondMortgages * 100 
la var delqRatio_secmort "second mortgage deliquency, \%"
la var netChargeOffRatio_secmort "second mortgage chargeoff, \%"
	

* TARP fundings, or were similar banks more likely to receive government bail-out funding compared to credit unions?
gen tarpfunded = (tarpDisbursed > 0)  // it is equivalent to (tarpProfit != 0)

	

** Institution type dummies **
gen bank = instType == "bk"
gen cu = instType == "cu"

order bank cu fail fail_* incomplete assets, a(year) 




** Institutional controls ** 
* size
la var assets "total assets, million"
la var log_assets "log total assets, million"
la var numBranch "number of branches"
la var marketShare "market share, \%"
la var numState "number of states with branches"
la var loans "log total loans, million"


* loan portfolio
	// pctComm pctRes pctConst pctCons pctAg pctOth
la var pctComm "commercial loans, decimal"  // it's missing for 2003 CU since no infor available
la var pctRes "real estate loans, decimal" 
la var pctConst "loans for construction, decimal" 
la var pctCons "consumer loans, decimal" 
la var pctAg "agricultural loans, decimal"
la var pctOth "other loans,decimal"  // negative why?


* how banks and credit unions fund their lending and manage their capital and liquidity. 
	// liquidity: cashHoldings
	// use deposite as a source of loan: coreDeposits, should it be divided by total_assets?
	// net worth ratio: capAd 
// la var cashHoldings "cash holdings, million"
la var coreDeposits "core deposit, million"
// la var log_cashHoldings "log cash holdings, million"
la var log_coreDeposits "log core deposit, million"
la var capAd "net worth ratio"  // what unit? 
	
	
* secondary market: 
	// pct_sold_to_secondary or secondary_mortgage	
	// Does it measure the ability of loan lending, i.e., issuance cost?
la var pct_sold_to_secondary "loans to secondary market, decimal"

gen pct_sold_to_secondary_mort = secondMortgages /mortgages  // 65 with 0 mortgages
la var pct_sold_to_secondary_mort "mortgages to secondary market, decimal"


* profitibility
la var roa "return on assets"  // what unit? 

	
	
** state controls **
	// avgSaHPI unemployment hpiGrowth
la var avgSaHPI "house price index"
replace unemployment = unemployment / 100
la var unemployment "unemployment rate, decimal"
la var hpiGrowth "house price index growh rate, decimal"


** borrower controls **
la var avg_income " average borrower income, million"
la var avg_loan_amt "average loan amount, million"
la var pct_male "male borrower, decimal"
la var pct_white "white borrower, decimal"
la var pct_black "black borrower, decimal"





** Other important variables **

* these two from HMDA
gen x = applications==0
ta x //  without applications
drop x


// ren orginations originations
gen x = originations==0
ta x     //  without originations
ta x cu  //  banks, 143 cu, comparable to the total #banks/#cu
drop x


* these two from call report
gen x = mortgages == 0  
ta x 
drop x   // 48 without outstanding mortgages

gen x = loans==0
ta x     // 4 without loans
drop x



*
sum loans avgCapAd mortgages originations subprime sold_to_secondary // 8 missing for avgCapAd, none others with missing 

bysort cu: sum $Institution  // for banks:
							 // pctComm, pctRes pctCons, pctAg numBranch and numState miss several (6)
							 // pct_sold_to_secondary miss around 100

gen log_originations = log(originations)
gen log_applications = log(applications)

gen approval_rate = originations / applications
la var originations "origination"
la var applications "application"
la var  approval_rate "approval rate"
la var log_originations "log origination"
la var log_applications "log application"



	
	
* ------------------------------------------ *
* 		         issues check		    	 *
* ------------------------------------------ *
** institutions with incomplete years
sort id year
	// id-year obs missing not because of small total assets and they are not indicated as failure, why?
save "$data_clean_failure/riskdata_analysis.dta",replace

collapse (mean) fail, by(instType year)
replace fail = fail * 100
save "$data_clean_failure/failure.dta",replace

